# Explore topic 13e # First load all of the functions we will use source( "../gnrnd5.R") source( "../gnrnd4.R") source( "../pop_sd.R") source( "../ci_stddev.R") # Topic 13e looks at generating a confidence # interval for a population standard deviation # # Let us generate a population gnrnd5(145898499904, 432003526) # put the population into big_pop big_pop <- L1 # We can look at some of the population head( big_pop, 20) # Then, our interest is to get a 97% confidence interval # for the standard deviation of the population. # To do this we will take a sample # of a certain size samp_size <- 43 # then, just so that we can all get the same # sample, generate the index values for # a sample of that size key1 <- 550910001+ (samp_size-1)*100 gnrnd4(key1, 500000001) L1 this_sample <- big_pop[ L1 ] # look at our sample this_sample # we can find the sample standard deviation samp_sd <- sd( this_sample ) samp_sd # Now, we will use the sample standard deviation as # our point estimate. We know that the distribution # of the ratio of (samp_size-1)*samp_sd^2 to # sigma^2, where sigma is the standard deviation of # the population will be a chi-squared # distribution with samp_size-1 degrees of freedom. # Therefore, for a 97% confidence interval we want to find # the x_left value that has (1-0.97)/2 = 0.015 as the # area to the left of that value, for samp_size-1 # degrees of freedom and the value x_right that has # 0.015 as the area to the right of that value. x_left <- qchisq( 0.015, samp_size-1 ) x_left x_right <- qchisq( 0.015, samp_size-1, lower.tail=FALSE ) x_right # then the left side of the confidence interval # is left_side <- sqrt((samp_size-1)*samp_sd^2/x_right) # and the right side of the confidence interval # is right_side <- sqrt((samp_size-1)*samp_sd^2/x_left) # left_side right_side # ### of course all of this could be done via # our ci_known function ci_stddev(samp_size, samp_sd, 0.97) ################################## # we could try this at a different confidence # level. Just alter the confidence level and # then run the subsequent lines, or just skip # down to line 72 and get the new values #################################### # If we express the confidence level as a # percent then we say that that percent of the # confidence intervals that we generate # using this methodology will contain the # true standard deviation. That means, that at this point # in running the script, I do not know if the # 97% confidence interval that we generated, # namely (35.657, 57.607 ) does or does not # contain the true population standard deviation. # # Let us find the true standard deviation and see if it is # in the interval. sigma <- pop_sd( big_pop ) sigma # yes it is! # This has been an illustration, but let us # go through the process 10000 times and # see how many intervals that we generate this # way contain the true mean # first reset the confidence level and # sample size just in case we want to change # them later conf_level <- 0.97 samp_size <- 43 L3 <- 1:10000 for( i in 1:10000 ) { this_sample <- sample( big_pop, samp_size ) samp_sd <- sd( this_sample ) this_ci <- ci_stddev( samp_size, samp_sd, conf_level) if( this_ci[1] <= sigma & sigma <= this_ci[2] ) { L3[i] = "hit"} else { L3[i] = "missed"} } # see how we did table( L3 ) ######### # if we want we can do this again and we # can even change the values in lines 107 # and/or 108 if we want.